#updated 11 27.2010 # reanalysis using BIC model selection # Analysis of initial size distributions #======================================================= #READING FILES #======================================================= isize=read.csv("July 2007 census and biomass2 edited.csv",header = T,na.strings=c("NA","na")) #converting variables in "isize" data to numeric/categorical variables isize$comp = as.factor(isize$comp) isize$herb = as.factor(isize$herb) isize$species = as.factor(isize$species) #isize$no.leaves07=as.numeric(isize$no.leaves07) isize$veg.bio = as.numeric(isize$veg.bio) isize$bolt08= as.factor(isize$bolt) # figure out which rows have the ugly "" factor levels isize[isize$Comp=="",5:10] = NA # relevel competition factor isize$Comp = factor(isize$Comp,levels=c("Low","Medium","High")) isize$Species = factor(isize$Species,levels=c("BT","TT")) isize$Herb = factor(isize$Herb,levels=c("Ambient","Reduced")) # discard rows with missing treatment values - mostly empty rows # also missing no.leaves07, and make sure no.leaves07 > 0 isize = isize[complete.cases(isize[,5:11]) & isize[,"no.leaves07"]>0,] library(lme4) #====================================================== #Visualizing the data #====================================================== ##Separating the data by species (bull vs. tall) isize.bull=isize[isize$species==0,] isize.tall=isize[isize$species==1,] old.par = par(mfrow=c(2,2)) hist(isize.bull$no.leaves07) hist(isize.tall$no.leaves07) hist(isize.bull$no.leaves08) hist(isize.tall$no.leaves08) par(old.par) library(lattice) stripplot(no.leaves07~Comp|Herb+Species,data=isize,jitter.data=TRUE) bwplot(no.leaves07~Comp|Herb+Species,data=isize) # =========================================================================== # modelling initial size distribution - no.leaves07 - Bull Thistle # =========================================================================== # assumes that growth models have been fitted, check for appropriate models if (!exists("bb.fits")) stop("fit growth models first!") # now do it using lme4 to include random effects and bolting # USE BIC model selection # make a unique "plot" identifier isize.bull$plot = paste(isize.bull$block,isize.bull$plot,sep=".") isb.mm0 <- lmer(no.leaves07~Herb * Comp + (1|block) + (1|plot),data=isize.bull) isb.resid = resid(isb.mm0) plot(fitted(isb.mm0),isb.resid) # looks heteroscedastic isb.mm0 <- lmer(log(no.leaves07)~Herb * Comp + (1|block) + (1|plot),data=isize.bull) isb.resid = resid(isb.mm0) plot(fitted(isb.mm0),isb.resid) # better qqnorm(isb.resid) # pooh. qqline(isb.resid) library(MASS) boxcox(no.leaves07~Herb * Comp,data=isize.bull) # pretty close to a log transform ... ks.test(isb.resid,"pnorm") # yuck # check a range of random effects structures isb.mm1 <- lmer(log(no.leaves07)~Herb * Comp + (1|block),data=isize.bull) isb.mm2 <- lmer(log(no.leaves07)~Herb * Comp + (1|plot),data=isize.bull) isb.mm3 <- lmer(log(no.leaves07)~Herb * Comp + (Herb|block) + (1|plot),data=isize.bull) isb.mm4 <- lmer(log(no.leaves07)~Herb * Comp + (1|block) + (Herb|plot),data=isize.bull) isb.mm5 <- lmer(log(no.leaves07)~Herb * Comp + (Comp|block) + (1|plot),data=isize.bull) isb.mm6 <- lmer(log(no.leaves07)~Herb * Comp + (1|block) + (Comp|plot),data=isize.bull) sapply(list(isb.mm0,isb.mm1,isb.mm2,isb.mm3,isb.mm4,isb.mm5,isb.mm6),BIC) # simple block and plot effects are the best # create model set - searching for best approximating model isb.models = list("~Herb + Comp + Herb:Comp", "~Herb + Comp", "~Herb", "~Comp", "~1") isb.fits = lapply(isb.models,function(ff)lmer(paste("log(no.leaves07)",ff,"+ (1|block) + (1|plot)"),data=isize.bull)) bic = unlist(sapply(isb.fits,BIC)) dbic = bic-min(bic) wbic = exp(-dbic/2)/sum(exp(-dbic/2)) isb.wbic = wbic data.frame(unlist(isb.models),dbic,wbic)[order(dbic),] # model 4 very strong, model 2 close, but could probably argue nesting, use model 4 fixef(isb.fits[[4]]) nd = data.frame(Comp=factor(c("Low","Medium","High"),levels=levels(isize.tall$Comp))) exp(model.matrix(~Comp,data=nd) %*% fixef(isb.fits[[4]])) # so the distribution from model 4 only is given by isb.means = model.matrix(~Comp,data=nd) %*% fixef(isb.fits[[4]]) # these are the means on the log scale isb.sd = attr(VarCorr(isb.fits[[4]]),"sc") # this is the standard deviation on the log scale # Now predict the biomass distribution # need to use the bb.fits models, I think, which means substantial model selection uncertainty bic = unlist(sapply(bb.fits,BIC)) dbic = bic-min(bic) bb.wbic = exp(-dbic/2)/sum(exp(-dbic/2)) ### weights of all 175 model combinations as a matrix all.wbic = outer(isb.wbic,bb.wbic) nd = data.frame(Comp=factor(rep(c("Low","Medium","High"),each=2),levels=levels(isize.bull$Comp)), Herb=factor(rep(c("Ambient","Reduced"),3))) isb.results = array(NA,dim=c(length(isb.fits),nrow(nd))) isb.se = matrix(NA,nrow=length(isb.fits),ncol=nrow(nd)) for (i in 1:length(isb.fits)){ mm.isb = model.matrix(as.formula(isb.models[[i]]),data=nd) fe = fixef(isb.fits[[i]]) vcf = vcov(isb.fits[[i]]) isb.results[i,] = mm.isb %*% fe for (j in 1:nrow(mm.isb)){ pick = which(mm.isb[j,]>0) isb.se[i,j] = sqrt(sum(vcf[pick,pick])) } } isb.rv = sapply(isb.fits,function(ff)attr(VarCorr(ff),"sc")) # now make a new dataframe with all the predictions from each model as the inputs nd = data.frame(no.leaves08=as.vector(isb.results),Comp=rep(nd$Comp,each=5),Herb=rep(nd$Herb,each=5),bolt08=0) bb.results = array(NA,dim=c(length(bb.fits),nrow(nd))) bb.se = matrix(NA,nrow=length(bb.fits),ncol=nrow(nd)) for (i in 1:length(bb.fits)){ mm.bb = model.matrix(as.formula(bb.models[[i]]),data=nd) fe = fixef(bb.fits[[i]]) vcf = vcov(bb.fits[[i]]) bb.results[i,] = mm.bb %*% fe for (j in 1:nrow(mm.bb)){ pick = which(mm.bb[j,]>0) bb.se[i,j] = sqrt(sum(vcf[pick,pick])) } } bb.rv = sapply(bb.fits,function(ff)attr(VarCorr(ff),"sc")) # OK that was efficient, but now its packed all funny, so can't use all.wbic directly. # have to repeat isb.wbic each = 6 I think. all.wbic = outer(bb.wbic,rep(isb.wbic,each=6)) sum(all.wbic) # == 6 # and now multiply bb.results by all.wbic and then sum by columns ... or something bb.means = apply(matrix(apply(all.wbic * bb.results,2,sum),nrow=6),1,sum) ### WOOOHOOO - and your guess is as good as mine what is what in that vector # and now the nasty part - getting the unconditional variances # first get the unconditional variances for the 30 predictions from the isb models isb.means = apply(isb.wbic * isb.results,2,sum) t1 = sweep(isb.results,2,isb.means) t2 = as.vector(isb.se^2 + t1^2) # t2 is the var for each row, unweighted by the probability, so add this to that derived from the t1 = sweep(bb.results,2,rep(bb.means,each=5)) # replicate each mean to match the number of isb models t2 = all.wbic*sqrt(bb.se^2 + t1^2 + rep(t2,each=35)) bb.ucse = apply(matrix(apply(t2,2,sum),nrow=6),1,sum) # OK now the residual variance - same for all six conditions all.wbic = outer(bb.wbic,isb.wbic) bb.ucrv = sum(all.wbic * (rep(bb.rv^2,times=5) + rep(isb.rv^2,each=35))) # and for all intents we can ignore the variance in this estimate, which # is approximately bb.ucrv^2 *2/268 # where 268 is the sample size, and assumes that kurtosis is zero. #bb.means refers to log.size #this tells us to what treatment the bb.means data refer to nd = data.frame(Comp=factor(rep(c("Low","Medium","High"),each=2),levels=levels(isize.bull$Comp)), Herb=factor(rep(c("Ambient","Reduced"),3))) IPM.parameters$size.dist.unconditional.se[1:12]=NA for (i in c(1,3,5)) {IPM.parameters$size.dist.mean[IPM.parameters$species=="BT" & IPM.parameters$herb=="Ambient"& IPM.parameters$comp==nd[i,"Comp"]]=bb.means[i]; IPM.parameters$size.dist.mean[IPM.parameters$species=="BT" & IPM.parameters$herb=="Reduced"& IPM.parameters$comp==nd[i+1,"Comp"]]=bb.means[i+1]} for (i in c(1,3,5)) {IPM.parameters$size.dist.unconditional.se[IPM.parameters$species=="BT" & IPM.parameters$herb=="Ambient"& IPM.parameters$comp==nd[i,"Comp"]]=bb.ucse[i]; IPM.parameters$size.dist.unconditional.se[IPM.parameters$species=="BT" & IPM.parameters$herb=="Reduced"& IPM.parameters$comp==nd[i+1,"Comp"]]=bb.ucse[i+1]} IPM.parameters$size.dist.resse[1:6]=sqrt(bb.ucrv) # ======================================================================================= # model initial size distribution for Tall Thistle # ======================================================================================= isize.tall$plot = paste(isize.tall$block,isize.tall$plot,sep=".") ist.mm0 <- lmer(no.leaves07~Herb * Comp + (1|block) + (1|plot),data=isize.tall) ist.resid = resid(ist.mm0) plot(fitted(ist.mm0),ist.resid) # looks heteroscedastic ist.mm0 <- lmer(log(no.leaves07)~Herb * Comp + (1|block) + (1|plot),data=isize.tall) ist.resid = resid(ist.mm0) plot(fitted(ist.mm0),ist.resid) # better qqnorm(ist.resid) # pooh. qqline(ist.resid) library(MASS) boxcox(no.leaves07~Herb * Comp,data=isize.tall) # pretty close to a log transform ... ks.test(ist.resid,"pnorm") # yuck - but need to use it to make the simulation work later # check a range of random effects structures ist.mm1 <- lmer(log(no.leaves07)~Herb * Comp + (1|block),data=isize.tall) ist.mm2 <- lmer(log(no.leaves07)~Herb * Comp + (1|plot),data=isize.tall) ist.mm3 <- lmer(log(no.leaves07)~Herb * Comp + (Herb|block) + (1|plot),data=isize.tall) ist.mm4 <- lmer(log(no.leaves07)~Herb * Comp + (1|block) + (Herb|plot),data=isize.tall) ist.mm5 <- lmer(log(no.leaves07)~Herb * Comp + (Comp|block) + (1|plot),data=isize.tall) ist.mm6 <- lmer(log(no.leaves07)~Herb * Comp + (1|block) + (Comp|plot),data=isize.tall) sapply(list(ist.mm0,ist.mm1,ist.mm2,ist.mm3,ist.mm4,ist.mm5,ist.mm6),BIC) # simple plot effects are the best # create model set - searching for best approximating model ist.models = list("~Herb + Comp + Herb:Comp", "~Herb + Comp", "~Herb", "~Comp", "~1") ist.fits = lapply(ist.models,function(ff)lmer(paste("log(no.leaves07)",ff," + (1|plot)"),data=isize.tall)) bic = unlist(sapply(ist.fits,BIC)) dbic = bic-min(bic) wbic = exp(-dbic/2)/sum(exp(-dbic/2)) ist.wbic = wbic data.frame(unlist(ist.models),dbic,wbic)[order(dbic),] # model 4 all the way fixef(ist.fits[[4]]) nd = data.frame(Comp=factor(c("Low","Medium","High"),levels=levels(isize.tall$Comp))) exp(model.matrix(~Comp,data=nd) %*% fixef(ist.fits[[4]])) # so the distribution is given by ist.means = model.matrix(~Comp,data=nd) %*% fixef(ist.fits[[4]]) # these are the means on the log scale ist.sd = attr(VarCorr(ist.fits[[4]]),"sc") # this is the standard deviation on the log scale # Now predict the biomass distribution # need to use the tb.fits models, I think, which means substantial model selection uncertainty bic = unlist(sapply(tb.fits,BIC)) dbic = bic-min(bic) tb.wbic = exp(-dbic/2)/sum(exp(-dbic/2)) ### weights of all 175 model combinations as a matrix all.wbic = outer(ist.wbic,tb.wbic) nd = data.frame(Comp=factor(rep(c("Low","Medium","High"),each=2),levels=levels(isize.tall$Comp)), Herb=factor(rep(c("Ambient","Reduced"),3))) ist.results = array(NA,dim=c(length(ist.fits),nrow(nd))) ist.se = matrix(NA,nrow=length(ist.fits),ncol=nrow(nd)) for (i in 1:length(isb.fits)){ mm.ist = model.matrix(as.formula(ist.models[[i]]),data=nd) fe = fixef(ist.fits[[i]]) vcf = vcov(ist.fits[[i]]) ist.results[i,] = mm.ist %*% fe for (j in 1:nrow(mm.ist)){ pick = which(mm.ist[j,]>0) ist.se[i,j] = sqrt(sum(vcf[pick,pick])) } } ist.rv = sapply(ist.fits,function(ff)attr(VarCorr(ff),"sc")) # now make a new dataframe with all the predictions from each model as the inputs nd = data.frame(no.leaves08=as.vector(ist.results),Comp=rep(nd$Comp,each=5),Herb=rep(nd$Herb,each=5),bolt08=0) tb.results = array(NA,dim=c(length(tb.fits),nrow(nd))) tb.se = matrix(NA,nrow=length(tb.fits),ncol=nrow(nd)) for (i in 1:length(tb.fits)){ mm.tb = model.matrix(as.formula(tb.models[[i]]),data=nd) fe = fixef(tb.fits[[i]]) vcf = vcov(tb.fits[[i]]) tb.results[i,] = mm.tb %*% fe for (j in 1:nrow(mm.tb)){ pick = which(mm.tb[j,]>0) tb.se[i,j] = sqrt(sum(vcf[pick,pick])) } } tb.rv = sapply(tb.fits,function(ff)attr(VarCorr(ff),"sc")) # OK that was efficient, but now its packed all funny, so can't use all.wbic directly. # have to repeat isb.wbic each = 6 I think. all.wbic = outer(tb.wbic,rep(ist.wbic,each=6)) sum(all.wbic) # == 6 # and now multiply tb.results by all.wbic and then sum by columns ... or something tb.means = apply(matrix(apply(all.wbic * tb.results,2,sum),nrow=6),1,sum) ### WOOOHOOO - and your guess is as good as mine what is what in that vector # and now the nasty part - getting the unconditional variances # first get the unconditional variances for the 30 predictions from the isb models ist.means = apply(isb.wbic * isb.results,2,sum) t1 = sweep(ist.results,2,ist.means) t2 = as.vector(ist.se^2 + t1^2) # t2 is the var for each row, unweighted by the probability, so add this to that derived from the t1 = sweep(tb.results,2,rep(tb.means,each=5)) # replicate each mean to match the number of ist models t2 = all.wbic*sqrt(tb.se^2 + t1^2 + rep(t2,each=35)) tb.ucse = apply(matrix(apply(t2,2,sum),nrow=6),1,sum) # OK now the residual variance - same for all six conditions all.wbic = outer(tb.wbic,ist.wbic) tb.ucrv = sum(all.wbic * (rep(bb.rv^2,times=5) + rep(ist.rv^2,each=35))) # and for all intents we can ignore the variance in this estimate, which # is approximately tb.ucrv^2 *2/268 # where 268 is the sample size, and assumes that kurtosis is zero. #tb.means refers to log.size #this tells us to what treatment the bb.means data refer to nd = data.frame(Comp=factor(rep(c("Low","Medium","High"),each=2),levels=levels(isize.bull$Comp)), Herb=factor(rep(c("Ambient","Reduced"),3))) for (i in c(1,3,5)) {IPM.parameters$size.dist.mean[IPM.parameters$species=="TT" & IPM.parameters$herb=="Ambient"& IPM.parameters$comp==nd[i,"Comp"]]=tb.means[i]; IPM.parameters$size.dist.mean[IPM.parameters$species=="TT" & IPM.parameters$herb=="Reduced"& IPM.parameters$comp==nd[i+1,"Comp"]]=tb.means[i+1]} for (i in c(1,3,5)) {IPM.parameters$size.dist.unconditional.se[IPM.parameters$species=="TT" & IPM.parameters$herb=="Ambient"& IPM.parameters$comp==nd[i,"Comp"]]=tb.ucse[i]; IPM.parameters$size.dist.unconditional.se[IPM.parameters$species=="TT" & IPM.parameters$herb=="Reduced"& IPM.parameters$comp==nd[i+1,"Comp"]]=tb.ucse[i+1]} IPM.parameters$size.dist.resse[7:12]=sqrt(tb.ucrv) write.csv(IPM.parameters,"InitSizeDistribution.Parameters.csv",row.names = FALSE)